# Install required packages if not already installed
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(c("SNPRelate", "GENESIS", "qqman", "GeneNet", "xlsx", "GWASTools", "SeqVarTools", "SeqArray", "RCy3"))
if(!require("SNPRelate", quietly = TRUE))
install.packages("SNPRelate")
if(!require("GENESIS", quietly = TRUE))
install.packages("GENESIS")
if(!require("dplyr", quietly = TRUE))
install.packages("dplyr")
if(!require("openxlsx", quietly = TRUE))
install.packages("openxlsx")
if(!require("readxl", quietly = TRUE))
install.packages("readxl")
if(!require("ggplot2", quietly = TRUE))
install.packages("ggplot2")
if(!require("qqman", quietly = TRUE))
install.packages("qqman")
# Load required libraries
library(SNPRelate)
library(GENESIS)
library(GWASTools)
library(SeqVarTools)
library(SeqArray)
library(dplyr)
library(openxlsx)
library(readxl)
library(ggplot2)
library(qqman)
# Convert PLINK format to GDS format
snpgdsPED2GDS("Dataset/input.ped", "Dataset/input.map", "Dataset/genotype.gds")
# Open the GDS file
genofile <- snpgdsOpen("Dataset/genotype.gds")
# Calculate kinship using MLE method
ibd <- snpgdsIBDMLE(genofile,
num.thread = 6,
kinship.constraint = TRUE,
kinship = TRUE)
kinship <- ibd$kinship
rownames(kinship) <- colnames(kinship) <- ibd$sample.id
# Close the GDS file
snpgdsClose(genofile)
# Count individuals with kinship > 0.1
high_kinship <- (sum(kinship > 0.1, na.rm = TRUE) - 156) / 2.0
cat("Number of individuals with kinship > 0.1:", high_kinship, "\n")
write.xlsx(kinship, file = "Dataset/Kinship_Matrix.xlsx", rowNames = TRUE)
kinship <- as.matrix(read.xlsx("Dataset/Kinship_Matrix.xlsx", rowNames = TRUE))
# Load PCA data
pca_data <- read.table("Dataset/Dataset of 156 Qataris/pca_results.eigenvec", header = FALSE)
colnames(pca_data) <- c("FID", "IID", paste0("PC", 1:20))
rownames(pca_data) <- pca_data$IID
pcs <- pca_data[, c("PC1", "PC2", "PC3")]
# Load metabolite data
metabolites <- read.csv("Dataset/qatari_metabolites_2025.csv", row.names = 1)
# Merge PCA with metabolite data
all.data <- as.data.frame(cbind(pcs, metabolites))
meta.names <- colnames(metabolites)
# Sample IDs
scan_annot_df <- data.frame(
scanID = rownames(metabolites),
stringsAsFactors = FALSE
)
rownames(scan_annot_df) <- scan_annot_df$scanID
scan_annot <- ScanAnnotationDataFrame(scan_annot_df)
genoFile <- "Dataset/genotype.gds"
gds <- GdsGenotypeReader(genoFile)
genoData <- GenotypeData(gds, scanAnnot = scan_annot)
results <- list()
null_models <- list()
for (meta in meta.names) {
df <- all.data[, c("PC1", "PC2", "PC3", meta)]
colnames(df)[4] <- "trait"
null.model <- fitNullModel(
x = df,
outcome = "trait",
covars = c("PC1", "PC2", "PC3"),
cov.mat = kinship,
family = "gaussian",
verbose = TRUE
)
genoIterator <- GenotypeBlockIterator(genoData)
assoc <- assocTestSingle(
gdsobj = genoIterator,
null.model = null.model,
verbose = TRUE
)
null_models[[meta]] <- null.model
assoc$metabolite <- meta
results[[meta]] <- assoc
}
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 140.150908 140.150908 -646.874628 1.350861
## [1] 21.354672 210.457918 -646.557174 1.269528
## [1] 7.279477 218.697709 -646.527236 1.261173
## [1] 0.328382 222.764840 -646.512966 1.257109
## [1] 0.1125786 222.8910805 -646.5125283 1.2569830
## [1] 0.000000 222.954187 -646.512300 1.256933
## [1] 0.000000 268.528849 -646.512300 1.043607
## [1] 0.000000 279.749204 -646.512300 1.001749
## [1] 0.000000 280.237632 -646.512300 1.000003
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 142.31419 142.31419 -648.93409 1.36687
## [1] 417.72541 64.47409 -648.82217 1.07896
## [1] 504.339488 32.141102 -648.808332 1.040985
## [1] 553.922749 13.520060 -648.804425 1.020921
## [1] 580.055431 3.735468 -648.803434 1.010575
## [1] 586.633755 1.291592 -648.803295 1.007939
## [1] 589.0900642 0.3820982 -648.8032535 1.0069476
## [1] 589.6259740 0.1839682 -648.8032453 1.0067305
## [1] 589.88538667 0.08809481 -648.80324142 1.00662534
## [1] 590.01302450 0.04093073 -648.80323954 1.00657358
## [1] 590.07633 0.00000 -648.80324 1.00661
## [1] 593.950907 0.000000 -648.803238 1.000043
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 138.26954 138.26954 -646.07942 1.35499
## [1] 411.716609 30.907443 -645.748661 1.200057
## [1] 475.975517 4.360440 -645.692907 1.175653
## [1] 483.557490 1.214955 -645.686793 1.172916
## [1] 485.4382023 0.4344023 -645.6852918 1.1722413
## [1] 486.37670238 0.04485858 -645.68454466 1.17190501
## [1] 486.435301 0.000000 -645.684459 1.171985
## [1] 557.818278 0.000000 -645.684459 1.022009
## [1] 569.830737 0.000000 -645.684459 1.000464
## [1] 570.0950 0.0000 -645.6845 1.0000
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 160.421757 160.421757 -655.088129 1.314857
## [1] 389.875348 61.635530 -654.705739 1.241815
## [1] 499.680186 13.908812 -654.555711 1.212554
## [1] 525.881455 2.495458 -654.522693 1.205961
## [1] 529.112725 1.087391 -654.518692 1.205158
## [1] 530.7255707 0.3845445 -654.5167009 1.2047572
## [1] 531.53129438 0.03341862 -654.51570773 1.20455725
## [1] 531.581630 0.000000 -654.515613 1.204598
## [1] 621.869435 0.000000 -654.515613 1.029705
## [1] 639.809230 0.000000 -654.515613 1.000833
## [1] 640.341690 0.000000 -654.515613 1.000001
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 172.144976 172.144976 -659.273384 1.294684
## [1] 35.702989 267.246050 -659.178850 1.163019
## [1] 6.584969 286.380269 -659.166335 1.143013
## [1] 3.211298 288.586812 -659.165016 1.140783
## [1] 1.540975 289.678800 -659.164372 1.139685
## [1] 0.7099403 290.2219800 -659.1640543 1.1391392
## [1] 0.2954546 290.4928674 -659.1638964 1.1388675
## [1] 0.08846968 290.62813550 -659.16381766 1.13873190
## [1] 0.03675566 290.66193060 -659.16379801 1.13869803
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 121.245311 121.245311 -634.579372 1.328256
## [1] 233.338879 111.172919 -634.564210 1.065211
## [1] 319.532549 82.985884 -634.557998 1.003869
## [1] 349.929588 69.133397 -634.557133 1.000032
## [1] 358.335712 65.058841 -634.557062 1.000001
## [1] 360.70381 63.90936 -634.55706 1.00000
## [1] 361.37285 63.58455 -634.55706 1.00000
## [1] 361.56199 63.49272 -634.55706 1.00000
## [1] 361.61548 63.46675 -634.55706 1.00000
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 156.52596 156.52596 -655.64424 1.35748
## [1] 48.23097 216.13551 -655.41039 1.31459
## [1] 19.787112 231.612743 -655.354057 1.304855
## [1] 5.438761 239.401757 -655.326345 1.300114
## [1] 1.837479 241.354535 -655.319461 1.298943
## [1] 0.03515157 242.33157265 -655.31602637 1.29835984
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 123.996592 123.996592 -639.338760 1.382719
## [1] 60.871044 157.068136 -639.003531 1.359475
## [1] 28.060549 174.188048 -638.839321 1.348401
## [1] 11.386016 182.873897 -638.758257 1.342995
## [1] 2.987092 187.245608 -638.718008 1.340325
## [1] 0.8800161 188.3419618 -638.7079711 1.3396611
## [1] 0.3527979 188.6162597 -638.7054634 1.3394953
## [1] 0.08913293 188.75343478 -638.70420979 1.33941246
## [1] 0.00000 188.78773 -638.70379 1.33947
## [1] 0.000000 236.633363 -638.703786 1.068639
## [1] 0.000000 251.832293 -638.703786 1.004143
## [1] 0.000000 252.871221 -638.703786 1.000017
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 161.385244 161.385244 -656.142070 1.325259
## [1] 38.484860 227.189174 -655.739495 1.286463
## [1] 7.468218 243.720938 -655.648218 1.277610
## [1] 3.598531 245.781954 -655.637095 1.276527
## [1] 1.664369 246.812015 -655.631558 1.275988
## [1] 0.6974669 247.3269292 -655.6287948 1.2757181
## [1] 0.2140616 247.5843569 -655.6274148 1.2755835
## [1] 0.09321604 247.64871016 -655.62706997 1.27554983
## [1] 0.032794 247.680886 -655.626898 1.275533
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 119.222004 119.222004 -634.668187 1.352377
## [1] 72.31691 188.16110 -634.65008 1.07306
## [1] 34.572058 221.749578 -634.647680 1.004712
## [1] 33.875045 223.208366 -634.647680 1.000022
## [1] 33.97702 223.16392 -634.64768 1.00000
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 126.182791 126.182791 -639.710426 1.365424
## [1] 16.109325 191.722220 -639.503496 1.280448
## [1] 2.155260 199.834267 -639.481554 1.271753
## [1] 0.4183262 200.8420620 -639.4788802 1.2706921
## [1] 0.2013401 200.9679317 -639.4785470 1.2705599
## [1] 0.09285534 201.03085991 -639.47838052 1.27049387
## [1] 0.03861503 201.06232238 -639.47829728 1.27046085
## [1] 0.000000 201.078053 -639.478238 1.270479
## [1] 0.000000 243.886684 -639.478238 1.047476
## [1] 0.000000 254.940734 -639.478238 1.002059
## [1] 0.000000 255.464462 -639.478238 1.000004
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 150.583257 150.583257 -652.854433 1.360195
## [1] 46.569163 230.637049 -652.796187 1.201497
## [1] 18.016408 249.827625 -652.784980 1.176845
## [1] 4.077068 259.036820 -652.779997 1.165899
## [1] 0.6481105 261.2855537 -652.7788176 1.1633132
## [1] 0.2213884 261.5649227 -652.7786720 1.1629943
## [1] 0.000000 261.704499 -652.778597 1.162853
## [1] 0.000000 298.355137 -652.778597 1.020005
## [1] 0.000000 304.206731 -652.778597 1.000385
## [1] 0.0000 304.3237 -652.7786 1.0000
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 151.852679 151.852679 -651.778154 1.329857
## [1] 12.783350 275.359797 -651.735319 1.065982
## [1] 0.5352815 282.4135798 -651.7329250 1.0618756
## [1] 0.1585763 282.6292272 -651.7328528 1.0617547
## [1] 0.06444825 282.68310193 -651.73283481 1.06172456
## [1] 0.000000 282.710035 -651.732822 1.061741
## [1] 0.000000 299.149902 -651.732822 1.003393
## [1] 0.000000 300.161489 -651.732822 1.000011
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 126.628339 126.628339 -638.435559 1.337986
## [1] 50.251834 169.409202 -638.303894 1.297236
## [1] 8.702666 192.262902 -638.237318 1.278975
## [1] 3.321966 195.200179 -638.228912 1.276800
## [1] 0.620575 196.673553 -638.224709 1.275720
## [1] 0.2822175 196.8580172 -638.2241838 1.2755852
## [1] 0.1129962 196.9502678 -638.2239210 1.2755179
## [1] 0.02837488 196.99639756 -638.22378959 1.27548428
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 122.896207 122.896207 -636.577454 1.345321
## [1] 273.275868 60.803516 -636.348322 1.264913
## [1] 347.745361 29.441973 -636.251661 1.232802
## [1] 383.793297 14.176274 -636.208468 1.218457
## [1] 401.431382 6.691432 -636.188159 1.211676
## [1] 410.14523 2.99038 -636.17832 1.20838
## [1] 414.474912 1.150673 -636.173483 1.206755
## [1] 416.632832 0.233580 -636.171083 1.205948
## [1] 417.171440 0.000000 -636.170473 1.205774
## [1] 488.364830 0.000000 -636.170473 1.029998
## [1] 502.587940 0.000000 -636.170473 1.000849
## [1] 503.014237 0.000000 -636.170473 1.000001
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 190.736956 190.736956 -669.908268 1.343988
## [1] 593.982205 66.385958 -669.797092 1.071266
## [1] 683.872814 28.574067 -669.779845 1.053801
## [1] 722.47785 12.12013 -669.77339 1.04713
## [1] 740.311629 4.480398 -669.770599 1.044195
## [1] 748.8795236 0.8017044 -669.7693015 1.0428158
## [1] 749.9292910 0.3504937 -669.7691444 1.0426486
## [1] 750.4528689 0.1254204 -669.7690662 1.0425653
## [1] 750.714332 0.000000 -669.769023 1.042561
## [1] 781.361120 0.000000 -669.769023 1.001669
## [1] 782.663304 0.000000 -669.769023 1.000003
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 155.733467 155.733467 -654.507323 1.344129
## [1] 355.166860 72.722910 -654.282169 1.263951
## [1] 462.90728 26.43672 -654.17315 1.23197
## [1] 517.572634 2.699657 -654.120544 1.217684
## [1] 520.996667 1.206249 -654.117301 1.216834
## [1] 522.7087181 0.4593385 -654.1156818 1.2164108
## [1] 523.5647486 0.0858339 -654.1148729 1.2161995
## [1] 523.67175257 0.03914279 -654.11477180 1.21617309
## [1] 523.725255 0.000000 -654.114687 1.216235
## [1] 616.838629 0.000000 -654.114687 1.032641
## [1] 636.3366 0.0000 -654.1147 1.0010
## [1] 636.972367 0.000000 -654.114687 1.000001
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 157.322856 157.322856 -653.464436 1.312417
## [1] 506.167423 13.312896 -652.972372 1.175852
## [1] 527.184477 4.255995 -652.950370 1.170391
## [1] 532.384927 2.012384 -652.945035 1.169062
## [1] 534.9782567 0.8932444 -652.9423906 1.1684022
## [1] 536.2731859 0.3343478 -652.9410742 1.1680734
## [1] 536.92021510 0.05506854 -652.94041747 1.16790933
## [1] 537.001066 0.000000 -652.940288 1.167979
## [1] 614.232527 0.000000 -652.940288 1.021121
## [1] 626.937420 0.000000 -652.940288 1.000428
## [1] 627.2056 0.0000 -652.9403 1.0000
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 136.049954 136.049954 -644.498831 1.348752
## [1] 140.972187 185.950193 -644.497171 1.071668
## [1] 122.330293 212.050481 -644.496435 1.004505
## [1] 110.592441 218.985607 -644.496320 1.000022
## [1] 106.8428 220.8184 -644.4963 1.0000
## [1] 105.7302 221.3605 -644.4963 1.0000
## [1] 105.4003 221.5212 -644.4963 1.0000
## [1] 105.3025 221.5689 -644.4963 1.0000
## [1] 105.2735 221.5830 -644.4963 1.0000
## Using 6 CPU cores
## Computing Variance Component Estimates...
## Sigma^2_A log-lik RSS
## [1] 142.960225 142.960225 -648.652339 1.355658
## [1] 9.028350 221.545197 -648.476636 1.272749
## [1] 4.438819 224.145937 -648.471299 1.270623
## [1] 2.141191 225.446944 -648.468641 1.269567
## [1] 0.9916964 226.0975906 -648.4673143 1.2690404
## [1] 0.4167828 226.4229477 -648.4666517 1.2687777
## [1] 0.1292849 226.5856344 -648.4663206 1.2686465
## [1] 0.0574054 226.6263071 -648.4662378 1.2686137
## [1] 0.000000 226.646644 -648.466172 1.268655
## [1] 0.000000 274.642226 -648.466172 1.046949
## [1] 0.000000 286.958271 -648.466172 1.002015
## [1] 0.000000 287.535339 -648.466172 1.000004
## Using 6 CPU cores
head(results$Metabolite5)
## variant.id chr pos n.obs freq MAC Score Score.SE
## 1 1 1 1120590 156 0.9102564 28 -0.041612740 0.3013262
## 2 2 1 1500664 155 0.5612903 136 -0.592421817 0.4601268
## 3 3 1 1748914 156 0.7403846 81 -0.622250487 0.4715085
## 4 4 1 1802548 156 0.6858974 98 0.003261458 0.4753489
## 5 5 1 1813782 156 0.8653846 42 0.058626020 0.3435269
## 6 6 1 2017761 156 0.6987179 94 -0.044858793 0.4453027
## Score.Stat Score.pval Est Est.SE PVE metabolite
## 1 -0.138098653 0.8901625 -0.4583029 3.318663 1.101861e-04 Metabolite5
## 2 -1.287518486 0.1979136 -2.7981816 2.173314 9.577558e-03 Metabolite5
## 3 -1.319701651 0.1869346 -2.7988929 2.120853 1.006235e-02 Metabolite5
## 4 0.006861187 0.9945256 0.0144340 2.103718 2.719859e-07 Metabolite5
## 5 0.170659168 0.8644918 0.4967854 2.910980 1.682702e-04 Metabolite5
## 6 -0.100737756 0.9197586 -0.2262231 2.245664 5.863169e-05 Metabolite5
dim(results$Metabolite5)
## [1] 67735 14
close(gds)
all_results <- bind_rows(results)
significant_results <- filter(all_results, Score.pval < 0.0001)
dim(significant_results)
## [1] 206 14
write.xlsx(significant_results, file = "Dataset/Significant_results.xlsx", rowNames = FALSE)
write.xlsx(all_results, file = "Dataset/All_results.xlsx", rowNames = FALSE)
heritability_results <- lapply(names(null_models), function(met) {
model <- null_models[[met]]
h2_ci <- varCompCI(model, prop = TRUE)
print(h2_ci)
})
## Proportion Lower 95 Upper 95
## V_A 0 NA NA
## V_resid.var 1 1 1
## Proportion Lower 95 Upper 95
## V_A 0 NA NA
## V_resid.var 1 1 1
## Proportion Lower 95 Upper 95
## V_A 1 1 1
## V_resid.var 0 NA NA
## Proportion Lower 95 Upper 95
## V_A 1 1 1
## V_resid.var 0 NA NA
## Proportion Lower 95 Upper 95
## V_A 1 1 1
## V_resid.var 0 NA NA
## Proportion Lower 95 Upper 95
## V_A 1 1 1
## V_resid.var 0 NA NA
## Proportion Lower 95 Upper 95
## V_A 1 1 1
## V_resid.var 0 NA NA
## Proportion Lower 95 Upper 95
## V_A 1 1 1
## V_resid.var 0 NA NA
## Proportion Lower 95 Upper 95
## V_A 0.000126439 -5.033504 5.033756
## V_resid.var 0.999873561 -4.033756 6.033504
## Proportion Lower 95 Upper 95
## V_A 0.000126439 -5.033504 5.033756
## V_resid.var 0.999873561 -4.033756 6.033504
## Proportion Lower 95 Upper 95
## V_A 0.8506954 -1.210683 2.912073
## V_resid.var 0.1493046 -1.912073 2.210683
## Proportion Lower 95 Upper 95
## V_A 0.8506954 -1.210683 2.912073
## V_resid.var 0.1493046 -1.912073 2.210683
## Proportion Lower 95 Upper 95
## V_A 0.0001450346 -5.631103 5.631393
## V_resid.var 0.9998549654 -4.631393 6.631103
## Proportion Lower 95 Upper 95
## V_A 0.0001450346 -5.631103 5.631393
## V_resid.var 0.9998549654 -4.631393 6.631103
## Proportion Lower 95 Upper 95
## V_A 0 NA NA
## V_resid.var 1 1 1
## Proportion Lower 95 Upper 95
## V_A 0 NA NA
## V_resid.var 1 1 1
## Proportion Lower 95 Upper 95
## V_A 0.0001323867 -4.660251 4.660515
## V_resid.var 0.9998676133 -3.660515 5.660251
## Proportion Lower 95 Upper 95
## V_A 0.0001323867 -4.660251 4.660515
## V_resid.var 0.9998676133 -3.660515 5.660251
## Proportion Lower 95 Upper 95
## V_A 0.1321338 -4.621856 4.886124
## V_resid.var 0.8678662 -3.886124 5.621856
## Proportion Lower 95 Upper 95
## V_A 0.1321338 -4.621856 4.886124
## V_resid.var 0.8678662 -3.886124 5.621856
## Proportion Lower 95 Upper 95
## V_A 0 NA NA
## V_resid.var 1 1 1
## Proportion Lower 95 Upper 95
## V_A 0 NA NA
## V_resid.var 1 1 1
## Proportion Lower 95 Upper 95
## V_A 0 NA NA
## V_resid.var 1 1 1
## Proportion Lower 95 Upper 95
## V_A 0 NA NA
## V_resid.var 1 1 1
## Proportion Lower 95 Upper 95
## V_A 0 NA NA
## V_resid.var 1 1 1
## Proportion Lower 95 Upper 95
## V_A 0 NA NA
## V_resid.var 1 1 1
## Proportion Lower 95 Upper 95
## V_A 0.0001440168 -6.646524 6.646812
## V_resid.var 0.9998559832 -5.646812 7.646524
## Proportion Lower 95 Upper 95
## V_A 0.0001440168 -6.646524 6.646812
## V_resid.var 0.9998559832 -5.646812 7.646524
## Proportion Lower 95 Upper 95
## V_A 1 1 1
## V_resid.var 0 NA NA
## Proportion Lower 95 Upper 95
## V_A 1 1 1
## V_resid.var 0 NA NA
## Proportion Lower 95 Upper 95
## V_A 1 1 1
## V_resid.var 0 NA NA
## Proportion Lower 95 Upper 95
## V_A 1 1 1
## V_resid.var 0 NA NA
## Proportion Lower 95 Upper 95
## V_A 1 1 1
## V_resid.var 0 NA NA
## Proportion Lower 95 Upper 95
## V_A 1 1 1
## V_resid.var 0 NA NA
## Proportion Lower 95 Upper 95
## V_A 1 1 1
## V_resid.var 0 NA NA
## Proportion Lower 95 Upper 95
## V_A 1 1 1
## V_resid.var 0 NA NA
## Proportion Lower 95 Upper 95
## V_A 0.3220787 -4.319544 4.963702
## V_resid.var 0.6779213 -3.963702 5.319544
## Proportion Lower 95 Upper 95
## V_A 0.3220787 -4.319544 4.963702
## V_resid.var 0.6779213 -3.963702 5.319544
## Proportion Lower 95 Upper 95
## V_A 0 NA NA
## V_resid.var 1 1 1
## Proportion Lower 95 Upper 95
## V_A 0 NA NA
## V_resid.var 1 1 1
herit_df <- do.call(rbind, heritability_results)
# Function to compute lambda_GC from a vector of p‑values
calc_lambda <- function(p) {
# drop invalid p's
p <- p[!is.na(p) & p > 0 & p < 1]
# convert to chi^2 (df=1) and take the median
chisq_med <- median(qchisq(p, df = 1, lower.tail = FALSE))
# theoretical median of X^2 is qchisq(0.5,1)
lambda_gc <- chisq_med / qchisq(0.5, 1)
return(lambda_gc)
}
# Loop over your 'results' list
inflation_results <- lapply(names(results), function(met) {
df <- results[[met]]
lambda <- tryCatch(calc_lambda(df$Score.pval),
error = function(e) NA_real_)
data.frame(
metabolite = met,
lambda_gc = lambda,
stringsAsFactors = FALSE
)
})
inflation_df <- do.call(rbind, inflation_results)
# Compute the average lambda_GC across all metabolites
avg_lambda <- mean(inflation_df$lambda_gc, na.rm = TRUE)
# Add the average as a final summary row:
inflation_df <- rbind(
inflation_df,
data.frame(metabolite = "AVERAGE", lambda_gc = avg_lambda, stringsAsFactors = FALSE)
)
# Inspect
print(inflation_df)
## metabolite lambda_gc
## 1 Metabolite1 0.9998043
## 2 Metabolite2 1.0089874
## 3 Metabolite3 1.0055276
## 4 Metabolite4 1.0045638
## 5 Metabolite5 1.1350615
## 6 Metabolite6 0.9907329
## 7 Metabolite7 1.3154792
## 8 Metabolite8 1.0069210
## 9 Metabolite9 1.2877979
## 10 Metabolite10 1.0444745
## 11 Metabolite11 1.0472325
## 12 Metabolite12 0.9958541
## 13 Metabolite13 0.9915045
## 14 Metabolite14 1.2936886
## 15 Metabolite15 0.9743886
## 16 Metabolite16 1.0120871
## 17 Metabolite17 0.9850725
## 18 Metabolite18 1.0178531
## 19 Metabolite19 1.0230257
## 20 Metabolite20 0.9842626
## 21 AVERAGE 1.0562160
# 1. Load the results
df <- all_results
# 2. Rename & clean columns
df$SNP <- df$variant.id
df$BP <- as.numeric(df$pos)
df$P <- as.numeric(df$Score.pval)
df$Metabolite <- as.factor(df$metabolite)
df$chr[df$chr == "X"] <- "23"
df$CHR <- as.numeric(df$chr)
# 3. Output directory
out_dir <- "plots"
if (!dir.exists(out_dir)) dir.create(out_dir)
# 4. Thresholds
threshold_gw <- 1e-4
threshold_sugg <- 1e-5
# 5. Per-metabolite ggplot2 Manhattan plots
mets <- levels(df$Metabolite)
for (met in mets) {
subdf <- subset(df, Metabolite == met)
subdf <- subdf[complete.cases(subdf[, c("CHR","BP","P")]), ]
subdf <- subset(subdf, CHR %in% 1:23 & P > 0 & P <= 1)
if (nrow(subdf) < 10) {
warning(sprintf("Skipping %s: only %d valid SNPs", met, nrow(subdf)))
next
}
# Add -log10(P)
subdf$negLogP <- -log10(subdf$P)
#png(
# filename = file.path(out_dir, paste0("Manhattan_", met, ".png")),
# width = 12,
# height = 6,
# units = "in",
# res = 300
# )
p <- manhattan(subdf,
main = paste("Manhattan Plot —", met),
col = c("skyblue", "orange"),
suggestiveline = FALSE,
genomewideline = -log10(1e-4))
#dev.off()
}
df_unique <- df
df_unique$P <- -log10(df_unique$P)
# Save plot
#png("plots/Manhattan_plot_all_metabolites.png", width = 12, height = 6,units = "in", res = 300)
p_all <- manhattan(df_unique,
main = "Combined Manhattan Plot (All Metabolites)",
col = c("skyblue", "orange"),
suggestiveline = FALSE,
genomewideline = -log10(1e-4))
#dev.off()
library(dplyr)
library(GeneNet)
library(RCy3)
# Create a matrix to hold the corrected values
corrected_matrix <- matrix(NA, nrow = nrow(metabolites), ncol = length(meta.names))
rownames(corrected_matrix) <- rownames(metabolites)
colnames(corrected_matrix) <- meta.names
# Fill matrix with residuals from each null model
for (met in meta.names) {
model <- null_models[[met]]
if (!is.null(model)) {
fittedData <- model$fit$fitted.values
corrected_matrix[, met] <- fittedData
}
}
# Estimate partial correlations
pcor <- ggm.estimate.pcor(corrected_matrix)
# Test for significant associations
network_results <- network.test.edges(pcor)
# Filter significant pairs (pval < 0.05)
sig_pairs <- network_results %>%
filter(pval < 0.05) %>%
arrange(desc(abs(pcor)))
# Save results
write.csv(sig_pairs, "Dataset/significant_pairs.csv", row.names = FALSE)
cytoscapePing()
# Prepare edges and nodes
edges <- data.frame(
source = as.character(sig_pairs$node1),
target = as.character(sig_pairs$node2),
weight = sig_pairs$pcor,
interaction = "interacts_with",
pval = sig_pairs$pval,
qval = sig_pairs$qval,
stringsAsFactors = FALSE
)
nodes <- data.frame(
id = unique(c(edges$source, edges$target)),
name = unique(c(edges$source, edges$target)),
stringsAsFactors = FALSE
)
# Create network in Cytoscape
createNetworkFromDataFrames(
nodes = nodes,
edges = edges,
title = "Metabolic Network",
collection = "Metabolite Correlations"
)
# Apply layout and visual styles
layoutNetwork("force-directed")
setNodeShapeDefault("ELLIPSE")
setNodeSizeDefault(30)
setNodeLabelMapping("name")
setEdgeLineWidthMapping(
table.column = "weight",
table.column.values = seq(min(abs(edges$weight)), max(abs(edges$weight)), length.out = 5),
widths = seq(1, 3, length.out = 5),
mapping.type = "continuous"
)
setEdgeColorMapping(
table.column = "weight",
table.column.values = c(min(edges$weight), 0, max(edges$weight)),
colors = c("red", "blue", "yellow"),
mapping.type = "continuous"
)
analyzeNetwork()
node.table <- getTableColumns(table = "node")
write.csv(node.table, "metabolite_network_node_metrics.csv", row.names = FALSE)
saveSession("Metabolite_Network_Analysis.cys")
print(head(node.table[, c("name", "Degree", "ClosenessCentrality", "BetweennessCentrality")]))
map_data <- read.table("Dataset/input.map", header = FALSE, stringsAsFactors = FALSE)
# Assign column names
colnames(map_data) <- c("chr", "snp_id", "pos_cM", "pos")
map_data <- subset(map_data, select= -c(pos_cM))
# Read the Excel file
df <- read_excel("Dataset/Significant_results.xlsx", sheet = 1)
# Select top 20 unique SNPs based on lowest p-value
top_snps <- df[order(df$`Score.pval`), ]
top_snps <- top_snps[!duplicated(top_snps$`variant.id`), ]
top_snps <- subset(top_snps, select = c("chr", "pos"))
top_snps <- head(top_snps, 20)
top_snps$chr[top_snps$chr == "X"] <- 23
top_snps$chr <- as.integer(top_snps$chr)
top_20snps <- inner_join(top_snps, map_data, by = c("chr", "pos"))
# Save to a plain text file
writeLines(top_20snps$snp_id, "Dataset/snps_list.txt")
plink --file input --extract snps_list.txt --make-bed --out top20_SNPs
plink --bfile top20_SNPs --recode vcf --out top20_SNPs_vcf
sudo apt update
sudo apt install perl
perl -v
./annotate_variation.pl -buildver hg19 -downdb -webfrom annovar refGene humandb/
top20_SNPs_vcf.vcf: Input VCF file containing SNPs to annotate.
humandb/: Directory containing ANNOVAR’s annotation databases (e.g., refGene).
buildver hg19: Specifies the human genome version (hg19).
out output/annotated_output: Output prefix and directory for results.
remove: Delete intermediate files after annotation.
protocol refGene: Use the refGene database for annotation (gene-based).
operation g: The operation type is gene-based (g).
nastring .: Missing values in the output will be represented as ..
vcfinput: Indicates the input file is in VCF format.
polish: Refines annotations (e.g., handles multi-allelic variants better).
thread 4: Use 4 threads to speed up processing.
./table_annovar.pl top20_SNPs_vcf.vcf humandb/ -buildver hg19 -out output/annotated_output -remove -protocol refGene -operation g -nastring . -vcfinput -polish -thread 4
data <- read.table("Annovar/annotated_output.hg19_multianno.txt", header = TRUE, sep = "", stringsAsFactors = FALSE)
View(data)
df <- read_excel("Dataset/Significant_results.xlsx", sheet = 1)
top_snps <- df[order(df$`Score.pval`), ]
top_snps <- top_snps[!duplicated(top_snps$`variant.id`), ]
top_snps <- subset(top_snps, select = c("chr", "pos"))
top_snps <- head(top_snps, 5)
top_snps$chr[top_snps$chr == "X"] <- 23
top_snps$chr <- as.integer(top_snps$chr)
top_5snps <- inner_join(top_snps, map_data, by = c("chr", "pos"))
# Save to a plain text file
writeLines(top_5snps$snp_id, "Dataset/top5_SNPs.txt")